library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggraph)
## Loading required package: ggplot2
library(SIN)
load('hw2_data.rdata') #loading brain data
print(head(mts[,1:5]))
## 1R 2R 3R 4R 5R
## [1,] 817.2553 764.5825 853.8753 725.1292 874.2730
## [2,] 811.1212 757.5109 852.0288 716.7457 862.5044
## [3,] 810.4676 754.3622 851.5202 715.4726 858.6204
## [4,] 811.4496 757.1581 853.0572 715.2271 859.4206
## [5,] 816.9825 773.2166 849.6543 720.0938 873.2568
## [6,] 817.1985 758.6393 853.8532 727.8290 874.3244
n_samples <-nrow(mts) # 240 rows >The rows are the observation time
n_features <-ncol(mts) # 81 cols
## Association Measure: True Corrolation matrix R
R <-cor(mts)
#Take a look of R
print(head(R[,1:10]))
## 1R 2R 3R 4R 5R 6R 7R
## 1R 1.0000000 0.9787156 0.9686066 0.4802085 -0.0317727 0.9637880 -0.2500584
## 2R 0.9787156 1.0000000 0.9894943 0.3747475 -0.1651396 0.9839382 -0.3969232
## 3R 0.9686066 0.9894943 1.0000000 0.4067454 -0.1433323 0.9687131 -0.3736153
## 4R 0.4802085 0.3747475 0.4067454 1.0000000 0.6591555 0.3913439 0.4808600
## 5R -0.0317727 -0.1651396 -0.1433323 0.6591555 1.0000000 -0.1131626 0.7946566
## 6R 0.9637880 0.9839382 0.9687131 0.3913439 -0.1131626 1.0000000 -0.3401281
## 8R 9R 10R
## 1R 0.9602842 0.9465585 0.89658330
## 2R 0.9865838 0.9725521 0.90593281
## 3R 0.9687650 0.9570605 0.91803415
## 4R 0.3476701 0.3768572 0.36104956
## 5R -0.1571435 -0.1151885 -0.05276126
## 6R 0.9966783 0.9935345 0.91090679
#Initiate the bootstrap
B = 1000
DeltaB.nboot = rep(NA, B)
set.seed(123)
for (b in 1:B){
# nonparametric sampling
idx<-sample( 1:n_samples,replace = TRUE )
x_boot=mts[idx,]
R_star <- cor(x_boot) # bootstrapped R
DeltaB.nboot[b] <- sqrt( n_samples ) * (max (abs(R_star-R) ))
}
# Show the histogram
hist(DeltaB.nboot, probability = T, breaks = 20, col = "purple", border = "black", main = "Bootstrapped DeltaB", add =)
# Bootstrapped ECDF:
f_hat <-ecdf( (1/B)*(DeltaB.nboot) )
f_hat
## Empirical CDF
## Call: ecdf((1/B) * (DeltaB.nboot))
## x[1:1000] = 0.0014549, 0.0015041, 0.0015721, ..., 0.0069121, 0.0069777
# Graph F_hat
par(mfrow = c(1,2))
plot(f_hat, main = "F_hat", col = "orchid", lwd = 3)
##### Finally, we can build simultaneous confidence set by using previous variables we have.Consider the sample quantile at level \(1 -\alpha\) of the bootsrapped distribution \(\hat{F}_{n}(t)\) , we got the \[t_{\alpha}=hat{F}_{n}^-1(1-\alpha)\] #for the CI, we pick level \(\alpha\) at 0.1, so we have 90 % of confidence and show the confidence interval
# Pick the t_alpha value for aplha = 0.1
t_alpha <- quantile(DeltaB.nboot, 1 - 0.1)
# Calculate confidence set for all correlation matrix R
lower.bound = R - (t_alpha/sqrt(n_samples))
upper.bound = R + (t_alpha/sqrt(n_samples))
# Show the confidence interval for one value of set R
round(
c(lower = R[10] - (t_alpha/sqrt(n_samples)),
upper = R[10] + (t_alpha/sqrt(n_samples))), 3
)
## lower.90% upper.90%
## 0.633 1.161
# Build a grah for correlation matrix we have
graph.R <- graph_from_adjacency_matrix(R, mode='undirected',weighted = "Correlation" ,add.colnames = NULL, add.rownames = NA)
#List of edges from of the graph
edges_ <- as_data_frame(graph.R, 'Edges')
#Take out the same variable (correlation value = 1)
edges_ <- edges_[abs(edges_$Correlation) !=1, ]
# Set the epsilon > 0
edges.1 <- edges_[abs(edges_$Correlation) > 0, ]
graph.R1 <- graph_from_data_frame(edges.1, FALSE)
# Make 4 group of correlation value from edge list
Correlation1 = cut_number((edges.1$ "Correlation"), 4)
# Set the color
color = c("#F0E442", "#0072B2", "#D55E00", "#CC79A7")
names(color) = levels(Correlation1)
# Plot the graph
ggraph(graph.R1, layout = 'linear', circular = TRUE) +
geom_edge_arc(aes(color=Correlation1))+
geom_node_point() +
geom_node_text(aes(label = name), repel=TRUE)
### Now, we pick different \(\epsilon\) to compare the result from before
# Set the epsilon = 0.5
edge.2<- edges_[abs(edges_$Correlation) > 0.5, ]
# Set the graph
graph.R2 <- graph_from_data_frame(edge.2, FALSE)
# Make 4 group of correlation value from
Correlation2 = cut_number((edge.2$ "Correlation"), 4)
# Set the color
color = c("#F0E442", "#0072B2", "#D55E00", "#CC79A7")
names(color) = levels(Correlation2)
# Plot the graph
ggraph(graph.R2, layout = 'linear', circular = TRUE) +
geom_edge_arc(aes(color=Correlation2))+
geom_node_point() +
geom_node_text(aes(label = name), repel=TRUE)
#### By comparing the both images, we can se by inception that the area of 47L(frontal cortex - left side) obviously loose so many connectivity as we increasing the \(\epsilon\) by 0.5. In the biolegical sense, Frontal cortex takes part in most human cognitive skills. Meaning that this particular region should connected to many functional nerves. #### Based on what we have done by changing the \(\epsilon\) value into quite extreme value, yielding the misintrepretation of model we need to observe (in this case, brain nerve connectivity). Therefore, picking the most suitable cut-off rate is important to draw a more reliable result. ### Now, we are going to repeat the analysis using linear partial correlation as implemented in SIN package. First, Build a p-value matrix for our dataset and see the result
# Build a matrix of simultaneous p-value graph
out.SIN=sinUG(R,n_samples)
# plot the result
plotUGpvalues(out.SIN)
#### Next, We build a connectivity matrix based on alpha that we pick. This time we choose \(\alpha\) at 0.05 (95% confidence). Then, show the result
par(mfrow = c(1,2))
#Set error level
alpha1 = 0.05
# build correlation matrix
edge.SIN1 = getgraph(out.SIN, alpha1)
print(head(edge.SIN1[,1:10]))
## 1R 2R 3R 4R 5R 6R 7R 8R 9R 10R
## 1R 0 1 0 0 0 0 0 0 0 0
## 2R 1 0 0 0 0 0 0 0 0 0
## 3R 0 0 0 0 0 0 0 0 0 0
## 4R 0 0 0 0 0 0 0 0 0 0
## 5R 0 0 0 0 0 0 0 0 0 0
## 6R 0 0 0 0 0 0 0 1 0 0
# Define the graph
graph.SIN1 = graph.adjacency(edge.SIN1, mode = "undirected")
ggraph(graph.SIN1, layout = 'linear', circular = TRUE) +
geom_edge_arc(aes(color="red"))+
geom_node_point() +
geom_node_text(aes(label = name), repel=TRUE)
#### Try to implement the analysis with different \(\alpha\). This time we pick 0.01
par(mfrow = c(1,2))
#Set error level
alpha2 = 0.01
# build correlation matrix
edge.SIN2 = getgraph(out.SIN, alpha2)
print(head(edge.SIN2[,1:10]))
## 1R 2R 3R 4R 5R 6R 7R 8R 9R 10R
## 1R 0 1 0 0 0 0 0 0 0 0
## 2R 1 0 0 0 0 0 0 0 0 0
## 3R 0 0 0 0 0 0 0 0 0 0
## 4R 0 0 0 0 0 0 0 0 0 0
## 5R 0 0 0 0 0 0 0 0 0 0
## 6R 0 0 0 0 0 0 0 1 0 0
# Define the graph
graph.SIN2 = graph.adjacency(edge.SIN2, mode = "undirected")
ggraph(graph.SIN2, layout = 'linear', circular = TRUE) +
geom_edge_arc(aes(color="red"))+
geom_node_point() +
geom_node_text(aes(label = name), repel=TRUE)
#### We can see as we decrease the \(\alpha\) , we loose connectivity on our brain connectivity model. #### Compared with using the previous method, in general we loose connectivity more using this analysis method. It is because the function getgraph from sin is put the same treshold on its own and at final graph its only 0 or 1 value. Compare to what we did on previous analysis, we put all value and build them as 4 categories of correlation, then we draw the connectivity based on the value we set up.